## title:   Narcissism in Political Participation
## authors: Z. Fazekas & P.K. Hatemi
## study:   US 2013
## goal:    clean and prepare covariates
##          clean and prepare outcome variables
##          clean and prepare narcissism variables
##          measurement summary
## ----
# 0. packages and helper --
library("psych")
library("dplyr")
library("stringr")
library("semTools")

## 1. load data
source("helper.R")
us <- read.csv("./data/us13.csv")

## filter correct year indicated & president correct (see footnote in paper)
us <- dplyr::filter(us, yearCorrect == 1 & presCheck == 1)

## 2. covariates --
us$female <- NA ## female
us$female[us$sex == "male"] <- 0
us$female[us$sex == "female"] <- 1

## dichotomous education
us$edu_cat <- NA
us$edu_cat[us$edu > 4] <- 1
us$edu_cat[us$edu <= 4]  <-  0

us$age <- 2013 - us$bYear ## age

## ethnicity
us$not_caucasian <- 1
us$not_caucasian[us$ethnic == 3] <- 0

# political interest
us$interest <- NA
us$interest <- car::recode(us$Q45, "5 = 0; 4 = 1; 3 = 2; 1 = 3")
us$interest <- us$interest/3 ## rescale 0-1

## 3. participation --

part_vars <- paste0("part", 1:8) ## same items as in Danish study
psych::alpha(us[, part_vars])

us$part_full <- rowSums(us[, part_vars])/8 ## within individual summed (mean)
                                           ## participation score


## narcissism -- (full narcissism battery)
names(us)[grep("npi", names(us))] <- paste0("PI", 1:40) # renamig for convenience here, earlier script compatibility
npi_vars <- paste0("PI", 1:40)
psych::alpha(us[, npi_vars])

aut_vars <- npi_vars[c(1, 8, 10, 11, 12, 32, 33, 36)] ## Authority
exh_vars <- npi_vars[c(2, 3, 7, 20, 28, 30, 38)] ## Exhibitionism (not in DK)
sup_vars <- npi_vars[c(4, 9, 26, 37, 40)] ## Superiority
exp_vars <- npi_vars[c(6, 13, 16, 23, 35)] ## Exploitativeness
ent_vars <- npi_vars[c(5, 14, 18, 24, 25, 27)] ## Entitlement
van_vars <- npi_vars[c(15, 19, 29)] ## Vanity
suf_vars <- npi_vars[c(17, 21, 22, 31, 34, 39)] ## Self-sufficiency

psych::alpha(us[, aut_vars]) ## 0.82 reliability
psych::alpha(us[, exh_vars]) ## 0.74 reliability
psych::alpha(us[, sup_vars]) ## 0.68 reliability
psych::alpha(us[, exp_vars]) ## 0.63 reliability
psych::alpha(us[, ent_vars]) ## 0.60 reliability
psych::alpha(us[, van_vars]) ## 0.75  reliability
psych::alpha(us[, suf_vars]) ## 0.52 reliability

## 3-factor variables
lead_auth   <- c("PI1", "PI5", "PI10", "PI11", "PI12", "PI27", "PI32", "PI33", "PI34", "PI36", "PI40")
grand_exhib <- c("PI4", "PI7", "PI15", "PI19", "PI20", "PI26", "PI28", "PI29", "PI30", "PI38")
ent_exp     <- c("PI13", "PI14", "PI24", "PI25")
npi25_vars <- c(lead_auth, grand_exhib, ent_exp)

psych::alpha(us[, lead_auth]) ## 0.84 reliability
psych::alpha(us[, grand_exhib]) ## 0.81 reliability
psych::alpha(us[, ent_exp]) ## 0.55 reliability
psych::alpha(us[, npi25_vars]) ## 0.88 reliability

## 7-factor model, CFA check
npi.factor <-  'auth =~ PI1 + PI8 + PI10 + PI11 + PI12 + PI32 + PI33 + PI36
exhib =~ PI2 + PI3 + PI7 + PI20 + PI28 + PI30 + PI38
sup =~ PI4 + PI9 + PI26 + PI37 + PI40
exp =~ PI6 + PI13 + PI16 + PI23 + PI35
ent =~ PI5 + PI14 + PI18 + PI24 + PI25 + PI27
van =~ PI15 + PI19 + PI29
suf =~ PI17 + PI21 + PI22 + PI31 + PI34 + PI39'
npi.factor.fit <- cfa(npi.factor, data = us, ordered = npi_vars)
summary(npi.factor.fit, fit.measures = TRUE)

# check the 3-factor model
npi.3factor <-  'lead_auth =~ PI1 + PI5 + PI10 + PI11 + PI12 + PI27 + PI32 + PI33 + PI34 + PI36 + PI40
grand_exh =~ PI4 + PI7 + PI15 + PI19 + PI20 + PI26 + PI28 + PI29 + PI30 + PI38
ent_exp   =~ PI13 + PI14 + PI24 + PI25'
npi.3factor.fit <- cfa(npi.3factor, data = us, ordered = npi_vars)
summary(npi.3factor.fit, fit.measures = TRUE, standardized = TRUE)

# extract 7 and 3-factor results
fit_40 <- data.frame(standardizedSolution(npi.factor.fit))[1:40, c(1, 3:5)]
fit_40[, 3:4] <- round(fit_40[, 3:4], 3)
fit_40$se[fit_40$se == 0.000 & fit_40$est != 1] <- 0.001
fit_40$rhs <- stringr::str_replace(fit_40$rhs, "PI", "Item")

fit_40$lhs <- stringr::str_replace(fit_40$lhs, "auth", "Authority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exhib", "Exhibitionism")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "sup", "Superiority")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "exp", "Exploitativeness")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "ent", "Entitlement")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "van", "Vanity")
fit_40$lhs <- stringr::str_replace(fit_40$lhs, "suf", "Self-sufficiency")

fit_40$lhs <- factor(fit_40$lhs, levels = c("Authority",
                                            "Exhibitionism",
                                            "Superiority",
                                            "Exploitativeness",
                                            "Entitlement",
                                            "Vanity",
                                            "Self-sufficiency"))
fit_40 <- mutate(fit_40, label = paste0(est.std, " (", se, ")"))
fit_40_us13 <- fit_40 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

# 3-factor
fit_25 <- data.frame(standardizedSolution(npi.3factor.fit))[1:25, c(1, 3:5)]
fit_25[, 3:4] <- round(fit_25[, 3:4], 3)
fit_25$se[fit_25$se == 0.000 & fit_25$est != 1] <- 0.001
fit_25$rhs <- stringr::str_replace(fit_25$rhs, "PI", "Item")

fit_25$lhs <- stringr::str_replace(fit_25$lhs, "lead_auth", "Leadership/Authority")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "ent_exp", "Entitlement/Exploitativeness")
fit_25$lhs <- stringr::str_replace(fit_25$lhs, "grand_exh", "Grandiose/Exhibitionism")
fit_25$lhs <- factor(fit_25$lhs, levels = c("Leadership/Authority",
                                            "Grandiose/Exhibitionism",
                                            "Entitlement/Exploitativeness"))
fit_25 <- mutate(fit_25, label = paste0(est.std, " (", se, ")"))
fit_25_us13 <- fit_25 %>% group_by(lhs) %>% 
  dplyr::select(lhs, rhs, label)

fit_25_us13
fit_40_us13



us$npi <- rowSums(us[, npi_vars])/length(npi_vars)
us$authority <- rowSums(us[, aut_vars])/length(aut_vars)
us$exhib <- rowSums(us[, exh_vars])/length(exh_vars)
us$exploit <- rowSums(us[, exp_vars])/length(exp_vars)
us$entitle <- rowSums(us[, ent_vars])/length(ent_vars)
us$sufficient <- rowSums(us[, suf_vars])/length(suf_vars)
us$vanity   <- rowSums(us[, van_vars])/length(van_vars)
us$superior <- rowSums(us[, sup_vars])/length(sup_vars)

narc_vars <- c("npi", "authority", "exhib", "superior", "exploit", 
               "entitle", "vanity", "sufficient") # store out narcissism variable names


us$npi25 <- rowSums(us[, npi25_vars])/length(npi25_vars)
us$leadauth <- rowSums(us[, lead_auth])/length(lead_auth)
us$entexp <- rowSums(us[, ent_exp])/length(ent_exp)
us$grandexhib <- rowSums(us[, grand_exhib])/length(grand_exhib)

narc25_vars <- c("npi25", "leadauth", "grandexhib", "entexp") # store out narcissism variable names

# means/SD 
apply(us[, narc_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 2),
                                             " (", round(sd(x, na.rm = TRUE), 2), ")"))

apply(us[, narc25_vars], 2, function(x) paste0(round(mean(x, na.rm = TRUE), 2),
                                               " (", round(sd(x, na.rm = TRUE), 2), ")"))


## variable recoding and export for regression
## keeping only variables of future interest
us$turnout_2012 <- us$vote2012 ## turnout in 2012 presidential elections
us <- dplyr::select(us, uID, sample, female:turnout_2012)

## 2SD standardization of variables of interest
narc_comp <- c("npi", "authority", "exploit", "sufficient", 
               "entitle", "vanity", "superior", "exhib",
               narc25_vars)
to_recode <- c(narc_comp, "age")
rec_vars  <- paste0(to_recode, "_sd") ## two_sd standardization

us[, rec_vars] <- apply(us[, to_recode], 2,
                        function (x) two_sd(x))

## add study ID
us$study <- "US13"
write.csv(us, file = "./data/us13-reg.csv", row.names = FALSE)

# R version 3.5.1 (2018-07-02)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS  10.15.4
# 
# Matrix products: default
# BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] semTools_0.5-1 lavaan_0.6-3   stringr_1.4.0  dplyr_0.8.3    psych_1.8.12  
# 
# loaded via a namespace (and not attached):
#   [1] zip_1.0.0         Rcpp_1.0.2        cellranger_1.1.0  pillar_1.4.2      compiler_3.5.1    forcats_0.4.0    
# [7] tools_3.5.1       zeallot_0.1.0     tibble_2.1.3      nlme_3.1-137      lattice_0.20-35   pkgconfig_2.0.3  
# [13] rlang_0.4.2       openxlsx_4.1.0    cli_1.1.0         rstudioapi_0.10   curl_3.3          yaml_2.2.0       
# [19] parallel_3.5.1    pbivnorm_0.6.0    haven_2.2.0       rio_0.5.16        vctrs_0.2.1       hms_0.5.3        
# [25] stats4_3.5.1      grid_3.5.1        tidyselect_0.2.5  glue_1.3.1        data.table_1.11.8 R6_2.4.0         
# [31] fansi_0.4.0       readxl_1.3.1      foreign_0.8-70    carData_3.0-2     purrr_0.3.3       car_3.0-2        
# [37] magrittr_1.5      backports_1.1.5   abind_1.4-5       assertthat_0.2.1  mnormt_1.5-5      utf8_1.1.4       
# [43] stringi_1.4.3     crayon_1.3.4  